//
// Created by meow star on 2019/9/25.
//

#include "word2vec.h"
#include <fstream>
#include <plog/Log.h>
#include <define.h>
int WordVectorLib::load_vectors(unordered_map<string, int>& keywords){

    ifstream file;
    file.open(_path ,ios::in);

    if(!file.is_open())
        return -1;

    int count = 0;
    std::string line;
    while(getline(file,line))
    {

        if(line.empty())
            continue;
        count ++;
        line.erase(line.length() - 1);
        vector<string> chunks = StringUtils::split(line, '\t');
        string key = chunks[0];
        if (count % 1000 == 0){
            PLOG_INFO << "loading vectors, count=" << count <<", load="<<_mp_w2v.size();
        }
        auto is_keyword = keywords.find(key);
        if (is_keyword == keywords.end()){
            continue;
        }

        auto it = _mp_w2v.find(key);
        if (it != _mp_w2v.end()){
            continue;
        }
        chunks.erase(chunks.begin());
        if (_vec_len == 0){
            _vec_len = chunks.size();
        } else{
            if (_vec_len != chunks.size()){
                printf("error, vec length %d != %d\n", _vec_len, chunks.size());
                continue;
            }
        }
        vector<float> wv;
        for(string& i : chunks){
            wv.push_back((float)atof(i.data()));
        }

        _mp_w2v[key] = wv;

    }
    return count;
}
vector<float>* WordVectorLib::get_vector(const string& key){
    auto it = _mp_w2v.find(key);
    if (it == _mp_w2v.end()){
        return NULL;
    }
    return &it->second;
}

int WordVectorLib::calc_keywords_vector(const unordered_map<string, float> &keywords, vector<float> &result) {
    vector<float>* vec = NULL;
    int ret = 0;
    for(auto it = keywords.begin(); it != keywords.end(); it ++){
        vec = get_vector(it->first);
        if (result.empty()){
            result = *vec;
            MathUtils::vector_multiply(result, it->second);
        }else{
            ret = MathUtils::add_vector(result, *vec, it->second);
            if (ret  == -1){
                PLOG_WARNING << "wrong vector size, skip keyword, keyword=" << it->first << ", vecsize=" << vec->size();
                continue;
            }
        }
    }
    return 0;

}
int WordVectorLib::calc_keywords_vector(const vector<string>& keywords, vector<float>& result){
    vector<float>* vec = NULL;
    int ret = 0;
    for(size_t i = 0; i < keywords.size(); ++ i){
        vec = get_vector(keywords[i]);
        if (result.empty()){
            result = *vec;
        }else{
            ret = MathUtils::add_vector(result, *vec);
            if (ret  == -1){
                PLOG_WARNING << "wrong vector size, skip keyword, keyword=" << keywords[i] << ", vecsize=" << vec->size();
                continue;
            }
        }
    }
    return 0;
}
